*************************************************************   
* do-file to create variables from HIS Graduate Panel Study	*
* dataset for persons with < 15 hrs of employment			*
* fo 16.12.2015												*
*************************************************************

version 14
set more off

cd "E:\Forschung_offline\02_Bobbitt-Zeher\"
use "Data\ZA4272.dta", clear
svyset [pw=X1gewinsg]

lab def noyes 0"nein" 1"ja"
lab def noneuyes 0"nein oder neutral" 1"ja"

*** dependent variable: annual income ***
recode C2Jeinkom (-2 -1=.), gen(inc_mon)
gen inc_year=inc_mon*12
gen ln_inc_mon=ln(inc_mon)
gen ln_inc_year=ln(inc_year)

* employment
recode G2zterw (1=1)(2=0), gen(employ)

*** independent variables ***
* gender *
recode K1geschl (-1=.)(1=0)(2=1), gen(frau)
lab var frau "gender"
lab def frau 0"male" 1"female"
lab val frau frau
drop if frau==.
recode frau(0=1)(1=0), gen(mann)

* ses of family of origin *
recode K1bervat(-1 7=.)(4=1)(1 2 3 5=0), gen(arbeitervat)
lab var arbeitervat "father worker"
lab val arbeitervat noyes

recode K1berabvat (-1 7=.)(1 2=1)(3/6=0), gen(hsvat)
lab var hsvat "father has college degree"
lab val hsvat noyes

recode K1berabmut (-1 7=.)(1 2=1)(3/6=0), gen(hsmut)
lab var hsmut "mother has college degree"
lab val hsmut noyes

gen parses = hsvat + hsmut - arbeitervat
lab var parses "parent SES"
drop hsvat hsmut arbeitervat

* importance of having lots of money *
recode C1zigeld (-1=.)(1=4)(2=3)(3=2)(4=1)(5=0), gen(zigeld)
lab var zigeld "Ziel sehr gut verdienen"

* family characteristics *
recode K2kinder (-1=.)(1=1)(2=0), gen(kinder)
lab var kinder "Kinder"
lab val kinder noyes
gen kinderxfrau = kinder * frau

recode K2famsta (-1=.)(1 2=0)(3=1), gen(verhei)
lab var verhei "Verheiratet"
lab val verhei noyes
gen verheixfrau = verhei * frau

* hours worked per month *
recode C2wazhau (-1=.), gen (arbzt_woch)
replace arbzt_woch = 0 if employ == 0
gen arbzt_mon = arbzt_woch*4
gen lnarbzt_mon=ln(arbzt_mon)

* Standardized Test Scores; Grades *
* Abitur / High School Leaving Certificate *
recode K1abnotbrd (-1=.)				/* for Western Germany */
gen abinotebrd= 40-K1abnotbrd			/* recode to higher is better grade */
egen abinotebrd_mean=mean(abinotebrd)	/* standardize */
egen abinotebrd_sd=sd(abinotebrd)
gen z_abinotebrd=(abinotebrd-abinotebrd_mean)/abinotebrd_sd

recode K1abnotddr (-1=.)(1=10)(2=20)(3=30)(4=40)	/* for Eastern Germany */
gen abinoteddr= 40-K1abnotddr			/* recode to higher is better grade */
egen abinoteddr_mean=mean(abinoteddr)	/* standardize */
egen abinoteddr_sd=sd(abinoteddr)
gen z_abinoteddr=(abinoteddr-abinoteddr_mean)/abinoteddr_sd

gen z_abinote=.
replace z_abinote=z_abinotebrd if z_abinotebrd!=.
replace z_abinote=z_abinoteddr if z_abinoteddr!=.

drop abinotebrd_mean abinotebrd_sd z_abinotebrd abinoteddr abinoteddr_mean abinoteddr_sd z_abinoteddr

* college leaving certificate *
recode B1examnote(-2=.)(-1=.), gen(ab_note)
recode B1punkte(-2=.)(-1=.), gen(ab_punkte)
gen ab_note2=60-ab_note
replace ab_note2=ab_punkte if B1ber1ab1 == 28
bysort B1ber1ab1: egen abnot_mean=mean(ab_note2)
bysort B1ber1ab1: egen abnot_sd=sd(ab_note2)
gen z_examen= (ab_note2-abnot_mean)/abnot_sd

drop ab_note-abnot_sd

* college major *
drop if B1ber1ab1==-1
recode B1ber1ab1(-1=.), gen(fach)
quietly tab B1ber1ab1, gen(fachdum)

	* Teaching track *
	recode B1absart1 (5/11=1)(1/4 12/16=0), gen(lehramt)

gen fachgruppe = .
replace fachgruppe = 1 if B1ber1ab1 == 29 | B1ber1ab1 == 30 	/* Business, Econ. Sciences */
replace fachgruppe = 2 if B1ber1ab1 == 31 | B1ber1ab1 == 57 | B1ber1ab1 == 58 | B1ber1ab1 == 59 | B1ber1ab1 == 60 | ///
	B1ber1ab1 == 61 | B1ber1ab1 == 62 | B1ber1ab1 == 63 | B1ber1ab1 == 64 | B1ber1ab1 == 65 | B1ber1ab1 == 66 | B1ber1ab1 == 67 | B1ber1ab1 == 68 | B1ber1ab1 == 69 | ///
	B1ber1ab1 == 36 | B1ber1ab1 == 39 | B1ber1ab1 == 40 | B1ber1ab1 == 41 | B1ber1ab1 == 42| B1ber1ab1 == 43 | B1ber1ab1 == 44 | ///
	B1ber1ab1 == 37 | B1ber1ab1 == 38 	/* Math, Natural Science, Engineering */
replace fachgruppe = 3 if B1ber1ab1 == 2 | B1ber1ab1 == 3 | B1ber1ab1 == 4 | B1ber1ab1 == 5 | B1ber1ab1 == 6 | ///
	B1ber1ab1 == 7 | B1ber1ab1 == 8| B1ber1ab1 == 9 | B1ber1ab1 == 10 | B1ber1ab1 == 11 | B1ber1ab1 == 12 | B1ber1ab1 == 13 | B1ber1ab1 == 14 | B1ber1ab1 == 1 | ///
	B1ber1ab1 == 23 | B1ber1ab1 == 25 | B1ber1ab1 == 26 | B1ber1ab1 == 27	/* Humanities and Social Science */
replace fachgruppe = 4 if B1ber1ab1 == 16 | B1ber1ab1 == 17 | lehramt == 1  /* Education */
replace fachgruppe = 5 if B1ber1ab1 == 15 | B1ber1ab1 == 22 | B1ber1ab1 == 49 | B1ber1ab1 == 50 | B1ber1ab1 == 51 /* Medical Science */
replace fachgruppe = 6 if B1ber1ab1 == 74 | B1ber1ab1 == 76 | B1ber1ab1 == 77 | B1ber1ab1 == 78		/* Art */
replace fachgruppe = 7 if B1ber1ab1 == 28	/* Law */
replace fachgruppe =. if B1ber1ab1 == -1

lab var fachgruppe "Fachgruppe"
lab def fachgruppe 1 "Business, Econ.Sci." 2 "Math, Nat.Sci. Engin." 3 "Soc.Sci, Humanities" 4 "Education" 5 "Medical Science" 6 "Art" 7 "Law"
lab val fachgruppe fachgruppe

tab fachgruppe, gen(fachgr_)
rename fachgr_1 fachgr_wiwi
rename fachgr_2 fachgr_natur
rename fachgr_3 fachgr_geist
rename fachgr_4 fachgr_erzieh
rename fachgr_5 fachgr_med
rename fachgr_6 fachgr_kunst
rename fachgr_7 fachgr_jura

*** percentage female of major ***
bysort B1ber1ab1: egen fa_frau=mean(frau)

* highest degree *
recode D2promo (-2=0)(2=0)(3=0)(4=0)(1=1), gen(promo_fertig)
recode promo_fertig (1=0)(0=1), gen(keine_promo)
lab var promo_fertig "Abgeschlossene Promotion"

* institutional selectivity *
recode B1hsart (1=1)(2=0), gen(unifh)
lab var unifh "Uni oder FH"
lab def unifh 0"Uni" 1"FH"
lab val unifh unifh


*********************
*** select sample ***
*********************
keep if employ == 0 | (arbzt_woch >0 & arbzt_woch <15)

recode K1gebjahr (-1=.)
gen age=2002-K1gebjahr
drop if age > 34				/* no one who was older than 30 when graduating from college */

drop if C2wazneb!=-2			/* hours worked is unreliable for persons with multiple jobs */

gen wage = inc_mon/arbzt_mon
drop if wage >=0 & wage <=2		/* as in Bobbitt-Zeher, less than 2 Euros per hr is an implausibly low wage, 3 obs dropped */

egen stdev = sd(inc_year)
drop if inc_year > $censor	 	/* as in Bobbitt-Zeher */

drop if frau == . | kinder == . | verhei == .	/* don't want to impute that */

egen miss = rowmiss (inc_year z_abinote z_examen fachgr_wiwi fachgr_natur fachgr_geist ///
	fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura fa_frau keine_promo promo_fertig unifh ///
	parses zigeld verhei kinder)
tab miss
	
************************************
*** imputation of missing values ***
************************************
sort id_suf

* define upper and lower bounds for certain variables *
qui sum z_abinote
gen z_abinote_l = r(min)
gen z_abinote_u = r(max)

qui sum z_examen
gen z_examen_l = r(min)
gen z_examen_u = r(max)

gen parses_l = -1	
gen parses_u = 2	
gen zigeld_l = 0	
gen zigeld_u = 4	

local incomplete 	= "z_abinote z_examen parses zigeld" 
local complete 		= "kinder verhei fachgr_wiwi fachgr_natur fachgr_geist fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura promo_fertig unifh fa_frau"

sort id_suf
mi set wide
mi register imputed `incomplete'
mi register regular `complete' frau X1gewinsg
mi svyset [pw=X1gewinsg]

mi impute chained 	(regress) z_abinote z_examen parses zigeld ///
					= kinder verhei arbzt_woch fachgr_wiwi fachgr_natur fachgr_geist ///
					fachgr_erzieh fachgr_med fachgr_kunst fachgr_jura promo_fertig ///
					unifh fa_frau, by(frau) replace add(30) rseed(56834)

* censor imputed values
foreach var of varlist z_abinote z_examen parses zigeld {
	dis " "
	replace _1_`var' = `var'_l if _1_`var'<`var'_l & `var' == .   /* replace the imputed value with the lower bound if the imputed value lies below lower bound */
	replace _1_`var' = `var'_u if _1_`var'>`var'_u & `var' == .   /* replace the imputed value with the upper bound if the imputed value lies above upper bound */
	replace `var' = _1_`var' if `var' ==.			 /* use imputed value if value is missing */
	}			

sum inc_year z_abinote z_examen parses zigeld 

save "Data\imputed_noempl.dta" , replace
*** END OF DOFILE ***
